version 11
#delim ;
set more off;
capture log close;
capture clear;
local adodir "C:\Scott\Synch\Stata\ado";
sysdir set PERSONAL "`adodir'\personal";
sysdir set PLUS "`adodir'\plus";
sysdir set OLDPLACE "`adodir'";

*log using National_bank_analysis1.txt, text replace;
/***!***!***!***!***!*** [National_bank_analysis1.do ] ***!***!***!***!***!
*
* Project: National Banks 		
* Programmer:  Scott Fulford
*
* Date:    	 7/12/2010
*
* Auditor:      
* Audit Date:   
*
* Purpose:      
* 1) Create a data set with additional transformed variables and create tables of means and graphs 
* 2) 
* 3)
* Inputs: 
      National_Banks_counties1890 created by National_Banks4.do
*
* Ouputs: 
*		National_Banks_counties1890_addvar
*
*
***!***!***!***!***!***!***!***!***!***!***!***!***!***!***!***!***/


/***Define Global Directory ****/
	local INDIR "C:\Scott\Research\National_Banks\Intermediate";
	local PROGDIR  "C:\Scott\Research\National_Banks\Programs";
	local OUTDIR  "C:\Scott\Research\National_Banks\Intermediate";
	local GRAPHDIR "C:\Scott\Research\National_Banks\Intermediate";
	local TEXDIR "C:\Scott\Research\National_Banks\Intermediate";
	
	
/*******************************/

set memory 800m;


cd `INDIR';
use National_Banks_counties1890, clear;

/*Make changes so later decades have correct variables, but are not reshaped*/
/**Change later decades variables**/
gen ln_totinc_pc1970 = ln(totinc1970/tpop1970);

/* Convert gisjoin1890 into number*/
tempvar gisjoin;
gen `gisjoin' =real(substr(gisjoin1890,2,.));
drop gisjoin1890;
gen gisjoin1890 = `gisjoin';
drop `gisjoin';
capture drop __000001;

/**** Reserve Cities in 1890
**Central Reserve Cities 
Chicago
New York
Saint Louis

**Reserve Cities
Albany
Baltimore
Brooklyn
Boston
Cincinnati
Cleveland
Detroit
Kansas City
Louisville
Milwaukee
Minneapolis
New Orleans
Omaha
Philadelphia
Pittsburgh
Saint Joseph
Saint Paul
San Francisco
Washington

****/

/***
Union States during Civil War

CALIFORNIA
CONNECTICUT
DELAWARE
ILLINOIS
INDIANA
IOWA
KANSAS
KENTUCKY
MAINE
MARYLAND
MASSACHUSETTS
MICHIGAN
MINNESOTA
MISSOURI
NEVADA
NEW HAMPSHIRE
NEW JERSEY
NEW YORK
OHIO
OREGON
PENNSYLVANIA
RHODE ISLAND
VERMONT
WEST VIRGINIA
WISCONSIN

** Exclude only partially settled states: NEVADA, OREGON, CALIFORNIA
Include NEBRASKA (state in 1867)
**/

gen unionstates =0;
foreach state in 
CONNECTICUT
DELAWARE
ILLINOIS
INDIANA
IOWA
KANSAS
KENTUCKY
MAINE
MARYLAND
MASSACHUSETTS
MICHIGAN
MINNESOTA
MISSOURI
NEBRASKA
NEW HAMPSHIRE
NEW JERSEY
NEW YORK
OHIO
PENNSYLVANIA
RHODE ISLAND
VERMONT
WEST VIRGINIA
WISCONSIN 
	{;
		quietly replace unionstates =1 if statename =="`state'";
	};
	
/****Make sure all 1910 Manufacturing is missing rather than 0***/
foreach varname of varlist manuf*1910 {;
	replace `varname' = .;
};

/***** Create some extra variables ***/
foreach year in 1860 1870 1880 1890 1900 1910 1920 {;
	gen f_urban`year' = pop_urban`year'/tpop`year';
};

foreach year in 1860 1870 1880 1890 1900 1910 1920 {;
	gen tval`year' = manuf_val`year'+farmprod_val`year';
	gen f_manuf`year'=manuf_val`year'/tval`year';
};


/*Use to normalize the population size*/
local normalizepopulationbanks = 1000; 

/*Urban cuttoff: exclude counties with large urban populations*/
local urbancutoff =50000;

tempvar popurban1880;
gen `popurban1880' = pop_urban1880;

tempfile tempfile1 tempfile1902;
save `tempfile1', replace;
keep gisjoin1890 capitalstock1902 banks50c1902 banks1902 banks25c1902;
gen year= 1900;
sort gisjoin1890 year;

save `tempfile1902', replace;

use `tempfile1', clear;
drop *1902;
reshape  long tpop male_pop tval  f_manuf f_urban manuf_estb livestock_val farmprod_val manuf_wages manuf_val area_imp_farm area_farm p_imp_farm farm_val farm_mach_val manuf_emp p_manuf_emp numfarms meanfarmsize gini_farmsize  banks liabilities capitalstock loans banks50c banks25c pop_urban  mean_dist_bank
		, i( gisjoin1890) j(year);
		
xtset  gisjoin1890 year, delta(10);

/**1902 variables

Note that the data is already xtset. 1902 does not imply the year of observation.
Instead appending 1902 says that 1900 has been replaced by data from 1902 for counties
with banks of 50 or less. The 1902 variables are thus the same as the other variables for
all years except 1900. The thought experiment is what would have happend if the 1902 
rules had been in place between 1890 and 1900. 
***/

merge 1:1 gisjoin1890 year using `tempfile1902';
drop _merge;
foreach thisvar in capitalstock banks50c banks banks25c {;
	replace `thisvar'1902 = `thisvar' if year != 1900;
};



gen pop_urban1880 = `popurban1880';

/***** Create per capita ***/

foreach varname in 
	tval
	manuf_estb
	manuf_emp
	manuf_val 
	farmprod_val
	livestock_val
	area_imp_farm
	farm_mach_val
	numfarms
	liabilities capitalstock loans
{;
	gen `varname'_pc = `varname'/(tpop);
};

/***** Create logs  ***/
foreach varname in 
	tval_pc
	manuf_val_pc 
	farmprod_val_pc
	livestock_val_pc
	farm_mach_val_pc
	area_imp_farm_pc
	area_imp_farm
	manuf_estb
	numfarms
	meanfarmsize
	liabilities capitalstock loans
	liabilities_pc capitalstock_pc loans_pc
	tpop
{;
	gen ln_`varname' = ln(`varname');
};

gen banks_pc = banks/(tpop/`normalizepopulationbanks');
gen banks50c_pc = banks50c/(tpop/`normalizepopulationbanks');
gen p_banks50c = banks50c/banks;

gen farm_yield = farmprod_val/area_imp_farm;
replace farm_yield =. if farm_yield >100;

gen groupa = 0 if city6to8_1880 ==1 & pop_urban1880 <= `urbancutoff';
replace groupa = 1 if city4to6_1880 ==1 & pop_urban1880 <= `urbancutoff';
gen Lbanks_pc = L.banks_pc;
gen Ibanks = (banks>0 & banks <.);



/*****Selects sample for regressions****/
/**Select only union and rural counties**/
gen insample_unionrural =( unionstates & pop_urban1880<=`urbancutoff') ;
/**Select only rural counties (include southern counties)*/
gen insample_allrural = (pop_urban1880<=`urbancutoff') ;

label var gisjoin1890 "1890 Counties";


save National_Banks_counties1890_addvar, replace;

/*To use in Arcmap, needs to be wide not long*/
tempfile alreadywide original;
save `original', replace;
keep  gisjoin1890 year tarea1890 city4to6_1880 city6to8_1880 citygt4_1880 city4to6_1870 city6to8_1870 citygt4_1870 unionstates  statename statenum countyname countynum insample* groupa;
keep if year ==1890;
drop year;
sort gisjoin1890;
save `alreadywide', replace;
use `original', clear;
drop tarea1890 city4to6_1880 city6to8_1880 citygt4_1880 city4to6_1870 city6to8_1870 citygt4_1870 unionstates  statename statenum countyname countynum pop_urban1880 insample* groupa;
sort gisjoin1890 year;
reshape wide  tpop- Ibanks, i(gisjoin1890) j(year);
merge 1:1 gisjoin1890 using `alreadywide';
drop _merge;

gen gisjoin = "G" + string(gisjoin1890) if gisjoin1890 >= 1000000;
replace gisjoin = "G0" + string(gisjoin1890) if gisjoin1890 < 1000000;
drop gisjoin1890;
/*Arcmap can only handle a smaller number of columns. Drop ln transformations which are not useful for graphing*/
drop ln_*;
capture drop  __000001*;

/*Do for each year separately, since ArcMap is such a bad data program
If want to have the non-changing data like to insample, statename . . . include with their own special save
Note that each of these files are already converted into 1890 counties, so should all be matched to 
1890 county shapefiles, not their original year.
*/

tempfile allyears;
save `allyears', replace;
foreach thisyear in 1860 1870 1880 1890 1900 1910 1920 {; 
	use `allyears', clear;
	keep gisjoin *`thisyear';
	outsheet using "National_Banks_counties_addvar_`thisyear'.csv", comma replace;
};

use `original', clear;
/*****Display table of means*****/
tempfile temp1;

/*Put things into dollar terms*/
replace capitalstock_pc = capitalstock_pc*1000;
replace loans_pc = loans_pc*1000;

save `temp1', replace;
local firstvar = 1;
set linesize 255;
quietly {;
foreach varname in tpop tval_pc manuf_val_pc farmprod_val_pc f_manuf  gini_farmsize p_imp_farm farm_yield banks mean_dist_bank banks_pc capitalstock_pc loans_pc /*liabilities_pc*/ {;

	foreach thissample in allrural unionrural {;
		quietly keep if insample_`thissample';
		if "`thissample'" == "unionrural" {;
			noisily display ", " _continue;
		}; else {;
			noisily display "  " _continue;
		};
		
		foreach year in 1870 1880 1890 1900 {; 

			quietly sum `varname' if year == `year';
			noisily display  `r(mean)' ", " _continue;
		};
		use `temp1', clear;
	};
	noisily display;
	foreach thissample in allrural unionrural {;
		quietly keep if insample_`thissample';
		if "`thissample'" == "unionrural" {;
			noisily display ", " _continue;
		}; else {;
			noisily display "  " _continue;
		};
		foreach year in 1870 1880 1890 1900 {; 
			quietly sum `varname' if year ==`year';
			noisily display `r(sd)' ", " _continue;
		};
		use `temp1', clear;
	};
	noisily display;

};


	foreach thissample in allrural unionrural {;
		quietly keep if insample_`thissample';
		if "`thissample'" == "unionrural" {;
			noisily display ", " _continue;
		}; else {;
			noisily display "  " _continue;
		};
	foreach year in 1870 1880 1890 1900 {; 
		quietly sum tpop if year ==`year';
		noisily display   `r(N)' ", " _continue;
	};
	use `temp1', clear;
};
noisily display;
};

/* Examine capital stock and total population
twoway (scatter capitalstock tpop, msize(tiny)) (function y=x*exp(-7.018 + .170+1.567/2+.693/2), range(tpop)) if insample_unionrural & year == 1880 & capitalstock<500;
*/
log close;
exit;
